library(signal)
library(tidyverse)
library(here)
library(lubridate)
library(dtplyr)
library(sf)
library(knitr)
library(mgcv)
library(future)
library(furrr)
library(progressr)
library(pracma)
handlers("txtprogressbar") # Simple console progress bar
suppressPackageStartupMessages({
library(mgcv)
library(nlme)
})
db_Europa_allobs <- read_csv(
here("data", "clean", "db_Europa_allobs.csv")) %>%
select(PlotObservationID, EUNISa_1, EUNISa_1_descr,
EUNISa_2, EUNISa_2_descr) %>%
mutate(PlotObservationID = factor(PlotObservationID),
EUNISa_1 = factor(EUNISa_1), EUNISa_2 = factor(EUNISa_2))
printall <- function(tibble) {
print(tibble, width = Inf)
}
I got these files using GEE.
These files contain all observations in the ReSurvey database from 2016 onward. In order to avoid computation problems in GEE, biogeographical units that contain more than 4500 points have been subdivided in ArcGIS.
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/Satellite_Embeddings"
# List CSV files
csv_files <- list.files(folder_path, full.names = TRUE, recursive = TRUE)
# Function to extract biogeo and unit from the filename
extract_info <- function(filename) {
first_word <- strsplit(filename, "_")[[1]][1]
biogeo <- str_extract(first_word,
"^(ALP|ANA|ARC|ATL|BLACKSEA|BOR|CON|MACARONESIA|MED|PANONIA|STEPPIC)")
unit <- str_remove(first_word, biogeo)
if (is.na(unit) || unit == "") unit <- NA_character_
list(biogeo = biogeo, unit = unit)
}
# Define column types: force RSrvypl to character, others auto-detected
custom_col_types <- cols(
RSrvypl = col_character(),
RSrvyst = col_character(),
default = col_guess()
)
# Read and process each file
data_list <- lapply(csv_files, function(file) {
info <- extract_info(basename(file)) # Use only the filename
# Read the file
df <- read_csv(file, col_types = custom_col_types) %>%
# Remove columns that give column type problems when combining data
select(-starts_with("EUNIS"), -starts_with("ReSurvey")) %>%
mutate(biogeo = info$biogeo, unit = info$unit)
return(df)
})
# Combine all data
data_RS_SatEmb_bands <- bind_rows(data_list) %>%
rename(PlotObservationID = PltObID)
# View the resulting tibble
print(data_RS_SatEmb_bands)
# Counts per biogeo and unit
print(data_RS_SatEmb_bands %>% count(biogeo, unit), n = 100)
data_RS_SatEmb_bands <- data_RS_SatEmb_bands %>%
select(-X1, -X2, -X3)
final_RS_data <- data_RS_SatEmb_bands %>%
mutate(PlotObservationID = as.factor(PlotObservationID)) %>%
left_join(db_Europa_allobs)
Read the data:
data_RS_CH <- read_csv(
"C:/Data/MOTIVATE/MOTIVATE_RS_data/Canopy_Height_1m/Europe_points_CanopyHeight_1m.csv")
db_Europa <- read_csv(
here("..", "DB_first_check", "data", "clean","db_Europa_20250107.csv")
)
data_RS_CH_ID <- db_Europa %>%
select(PlotObservationID, obs_unique_id) %>%
right_join(data_RS_CH %>%
# Rename to be able to join on this column
rename(obs_unique_id = obs_unique)) %>%
select(PlotObservationID, canopy_height)
Join:
final_RS_data <- final_RS_data %>%
mutate(PlotObservationID = factor(PlotObservationID)) %>%
left_join(data_RS_CH_ID %>%
mutate(PlotObservationID = factor(PlotObservationID)))
write_tsv(final_RS_data,
here("data", "clean","final_RS_data_SatEmb_20250919.csv"))
sessionInfo()
R version 4.5.1 (2025-06-13 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26100)
Matrix products: default
LAPACK version 3.12.1
locale:
[1] LC_COLLATE=Spanish_Spain.utf8 LC_CTYPE=Spanish_Spain.utf8
[3] LC_MONETARY=Spanish_Spain.utf8 LC_NUMERIC=C
[5] LC_TIME=Spanish_Spain.utf8
time zone: Europe/Madrid
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] pracma_2.4.4 progressr_0.15.1 furrr_0.3.1 future_1.67.0 mgcv_1.9-3
[6] nlme_3.1-168 knitr_1.50 sf_1.0-21 dtplyr_1.3.2 here_1.0.2
[11] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.2 dplyr_1.1.4 purrr_1.1.0
[16] readr_2.1.5 tidyr_1.3.1 tibble_3.3.0 ggplot2_4.0.0 tidyverse_2.0.0
[21] signal_1.8-1
loaded via a namespace (and not attached):
[1] gtable_0.3.6 xfun_0.53 bslib_0.9.0 lattice_0.22-7
[5] tzdb_0.5.0 vctrs_0.6.5 tools_4.5.1 generics_0.1.4
[9] parallel_4.5.1 proxy_0.4-27 pkgconfig_2.0.3 Matrix_1.7-4
[13] KernSmooth_2.23-26 data.table_1.17.8 RColorBrewer_1.1-3 S7_0.2.0
[17] lifecycle_1.0.4 compiler_4.5.1 farver_2.1.2 codetools_0.2-20
[21] htmltools_0.5.8.1 class_7.3-23 sass_0.4.10 yaml_2.3.10
[25] crayon_1.5.3 pillar_1.11.0 jquerylib_0.1.4 MASS_7.3-65
[29] classInt_0.4-11 cachem_1.1.0 parallelly_1.45.1 tidyselect_1.2.1
[33] digest_0.6.37 stringi_1.8.7 listenv_0.9.1 splines_4.5.1
[37] rprojroot_2.1.1 fastmap_1.2.0 grid_4.5.1 cli_3.6.5
[41] magrittr_2.0.3 utf8_1.2.6 e1071_1.7-16 withr_3.0.2
[45] scales_1.4.0 bit64_4.6.0-1 timechange_0.3.0 rmarkdown_2.29
[49] globals_0.18.0 bit_4.6.0 hms_1.1.3 evaluate_1.0.5
[53] rlang_1.1.6 Rcpp_1.1.0 glue_1.8.0 DBI_1.2.3
[57] vroom_1.6.5 rstudioapi_0.17.1 jsonlite_2.0.0 R6_2.6.1
[61] units_0.8-7